from datetime import timedelta
from utils.operators.cluster_for_spark_sql_operator import SparkSqlOperator
from jms.dm.dm_transit_violation_sum_day_dt import jms_dm__dm_transit_violation_sum_day_dt

jms_dm__dm_transit_violation_sum_day_push_dt = SparkSqlOperator(
    task_id='jms_dm__dm_transit_violation_sum_day_push_dt',
    task_concurrency=1,
    pool_slots=4,
    master='yarn',
    # execution_timeout=timedelta(hours=1)
    # excel平均时长:2分33秒
    # execution_timeout = timedelta(minutes=15)
    # excel平均时长:2分33秒
    execution_timeout=timedelta(minutes=30),
    email='jokic.wang@jtexpress.com',
    name='jms_dm__dm_transit_violation_sum_day_push_dt_{{ execution_date | date_add(1) | cst_ds }}',
    sql='jms/dm/dm_transit_violation_sum_day_push_dt/execute.sql',
    driver_memory='3G',
    driver_cores=1,
    executor_cores=4,
    executor_memory='4G',
    num_executors=8,
    conf={'spark.dynamicAllocation.enabled': 'true',  # 动态资源开启
          'spark.shuffle.service.enabled': 'true',  # 动态资源 Shuffle 服务开启
          'spark.dynamicAllocation.maxExecutors': 10,
          'spark.dynamicAllocation.cachedExecutorIdleTimeout': 180,  # 动态资源自动释放闲置 Executor 的超时时间(s)
          'spark.sql.sources.partitionOverwriteMode': 'dynamic',  # 允许删改已存在的分区
          'spark.sql.shuffle.partitions': 100,
          'spark.hadoop.hive.exec.dynamic.partition.mode': 'true',
          'spark.yarn.executor.memoryOverhead': '2G',
          'spark.shuffle.file.buffer': '64k',
          'spark.executor.extraJavaOptions': '-XX:+UseG1GC -XX:ParallelGCThreads=4'
          },
    hiveconf={'hive.exec.dynamic.partition': 'true',  # 动态分区
              'hive.exec.dynamic.partition.mode': 'nonstrict',
              'hive.exec.max.dynamic.partitions': 200,  # 每天生成 20 个分区
              'hive.exec.max.dynamic.partitions.pernode': 200,  # 每天生成 20 个分区
              },
    yarn_queue='pro',
)

jms_dm__dm_transit_violation_sum_day_push_dt << [
    jms_dm__dm_transit_violation_sum_day_dt
]
